Packages
library(dplyr)
library(dygraphs)
library(forecast)
library(h2o)
library(imputeTS)
library(lubridate)
library(plotly)
library(readxl)
library(TSstudio)
## Obtain data
# set working directory to location of downloaded data files
path <-
"C:/Users/keoka/OneDrive - Texas A&M University/Courses/STAT_684/Project/Data"
setwd(path)
bf_df <- read_excel("APU0000703112.xls") # data series for beef prices
## New names:
## * `` -> ...2
## Examine data
class(bf_df) # table, data frame
## [1] "tbl_df" "tbl" "data.frame"
dim(bf_df) # 469 rows, 2 columns
## [1] 469 2
str(bf_df); summary(bf_df) # tibble with two character variables
## tibble [469 x 2] (S3: tbl_df/tbl/data.frame)
## $ FRED Graph Observations: chr [1:469] "Federal Reserve Economic Data" "Link: https://fred.stlouisfed.org" "Help: https://fredhelp.stlouisfed.org" "Economic Research Division" ...
## $ ...2 : chr [1:469] NA NA NA NA ...
## FRED Graph Observations ...2
## Length:469 Length:469
## Class :character Class :character
## Mode :character Mode :character
head(bf_df,10+5); tail(bf_df,5) # view first 5 and last 5 observations
## # A tibble: 15 x 2
## `FRED Graph Observations` ...2
## <chr> <chr>
## 1 Federal Reserve Economic Data <NA>
## 2 Link: https://fred.stlouisfed.org <NA>
## 3 Help: https://fredhelp.stlouisfed.org <NA>
## 4 Economic Research Division <NA>
## 5 Federal Reserve Bank of St. Louis <NA>
## 6 <NA> <NA>
## 7 APU0000703112 Average Price: Ground Beef, 100% Beef ~
## 8 <NA> <NA>
## 9 Frequency: Monthly <NA>
## 10 observation_date APU0000703112
## 11 30682 1.29
## 12 30713 1.3400000000000001
## 13 30742 1.3080000000000001
## 14 30773 1.331
## 15 30803 1.3009999999999999
## # A tibble: 5 x 2
## `FRED Graph Observations` ...2
## <chr> <chr>
## 1 44501 4.7160000000000002
## 2 44531 4.6040000000000001
## 3 44562 4.5540000000000003
## 4 44593 4.6299999999999999
## 5 44621 4.7569999999999997
names(bf_df) # variable names: "FRED Graph Observations" and "...2"
## [1] "FRED Graph Observations" "...2"
## Reformat data
bf_df <- bf_df[11:469,] # exclude description of data set in first 10 rows
names(bf_df) <- c("date","beef price") # rename variables
bf_df[,1] <- as.Date(as.numeric(unlist(bf_df[,1])), # character to numeric date
origin = as.Date("1899-12-30")) # align to Excel origin point
bf_df[,2] <- round(as.numeric(unlist(bf_df[,2])),3) # character to numeric values
str(bf_df); summary(bf_df) # tibble with date variable and numeric variable
## tibble [459 x 2] (S3: tbl_df/tbl/data.frame)
## $ date : Date[1:459], format: "1984-01-01" "1984-02-01" ...
## $ beef price: num [1:459] 1.29 1.34 1.31 1.33 1.3 ...
## date beef price
## Min. :1984-01-01 Min. :0.000
## 1st Qu.:1993-07-16 1st Qu.:1.430
## Median :2003-02-01 Median :1.778
## Mean :2003-01-30 Mean :2.275
## 3rd Qu.:2012-08-16 3rd Qu.:3.020
## Max. :2022-03-01 Max. :4.757
which(is.na(bf_df[,2])) # no missing values
## integer(0)
which(bf_df[,2]==0) # missing value was converted to zero value in row 346
## [1] 346
bf_df[341:350,] # view zero value in row 346
## # A tibble: 10 x 2
## date `beef price`
## <date> <dbl>
## 1 2012-05-01 3.00
## 2 2012-06-01 3.01
## 3 2012-07-01 3.08
## 4 2012-08-01 2.99
## 5 2012-09-01 3.02
## 6 2012-10-01 0
## 7 2012-11-01 3.18
## 8 2012-12-01 3.08
## 9 2013-01-01 3.41
## 10 2013-02-01 3.38
bf_df[346,2] <- NA # convert zero value back to missing (NA) value
bf_df[341:350,] # view missing value (NA) in row 346
## # A tibble: 10 x 2
## date `beef price`
## <date> <dbl>
## 1 2012-05-01 3.00
## 2 2012-06-01 3.01
## 3 2012-07-01 3.08
## 4 2012-08-01 2.99
## 5 2012-09-01 3.02
## 6 2012-10-01 NA
## 7 2012-11-01 3.18
## 8 2012-12-01 3.08
## 9 2013-01-01 3.41
## 10 2013-02-01 3.38
# impute missing value using linear interpolation
bf_df[346,2] <- na_interpolation(ts(bf_df))[346,2]
bf_df[341:350,] # view imputed value in row 346
## # A tibble: 10 x 2
## date `beef price`
## <date> <dbl>
## 1 2012-05-01 3.00
## 2 2012-06-01 3.01
## 3 2012-07-01 3.08
## 4 2012-08-01 2.99
## 5 2012-09-01 3.02
## 6 2012-10-01 3.10
## 7 2012-11-01 3.18
## 8 2012-12-01 3.08
## 9 2013-01-01 3.41
## 10 2013-02-01 3.38
## Convert data frome to univariate time series object with defined attributes
bf_ts <- ts(data = bf_df[,2], # series values
start = c(1984,1), # time of first observation: January 1, 1984
end = c(2022,3), # time of last observation: March 1, 2022
frequency = 12) # series frequency
ts_info(bf_ts) # ts object with 1 variable and 459 observations
## The bf_ts series is a ts object with 1 variable and 459 observations
## Frequency: 12
## Start time: 1984 1
## End time: 2022 3
## Obtain data
# set working directory to location of downloaded data files
path =
"C:/Users/keoka/OneDrive - Texas A&M University/Courses/STAT_684/Project/Data"
setwd(path)
ch_df = read_excel("APU0000706111.xls") # data series for chicken prices
## New names:
## * `` -> ...2
## Examine data
class(ch_df) # table, data frame
## [1] "tbl_df" "tbl" "data.frame"
dim(ch_df) # 517 rows, 2 columns
## [1] 517 2
str(ch_df); summary(ch_df) # tibble with two character variables
## tibble [517 x 2] (S3: tbl_df/tbl/data.frame)
## $ FRED Graph Observations: chr [1:517] "Federal Reserve Economic Data" "Link: https://fred.stlouisfed.org" "Help: https://fredhelp.stlouisfed.org" "Economic Research Division" ...
## $ ...2 : chr [1:517] NA NA NA NA ...
## FRED Graph Observations ...2
## Length:517 Length:517
## Class :character Class :character
## Mode :character Mode :character
head(ch_df,10+5); tail(ch_df,5) # view first 5 and last 5 observations
## # A tibble: 15 x 2
## `FRED Graph Observations` ...2
## <chr> <chr>
## 1 Federal Reserve Economic Data <NA>
## 2 Link: https://fred.stlouisfed.org <NA>
## 3 Help: https://fredhelp.stlouisfed.org <NA>
## 4 Economic Research Division <NA>
## 5 Federal Reserve Bank of St. Louis <NA>
## 6 <NA> <NA>
## 7 APU0000706111 Average Price: Chicken, Fresh, Whole (~
## 8 <NA> <NA>
## 9 Frequency: Monthly <NA>
## 10 observation_date APU0000706111
## 11 29221 0.69899999999999995
## 12 29252 0.67300000000000004
## 13 29281 0.65500000000000003
## 14 29312 0.63800000000000001
## 15 29342 0.628
## # A tibble: 5 x 2
## `FRED Graph Observations` ...2
## <chr> <chr>
## 1 44501 1.583
## 2 44531 1.6060000000000001
## 3 44562 1.6220000000000001
## 4 44593 1.6319999999999999
## 5 44621 1.724
names(ch_df) # variable names: "FRED Graph Observations" and "...2"
## [1] "FRED Graph Observations" "...2"
## Reformat data
ch_df <- ch_df[11:517,] # exclude description of data set in first 10 rows
names(ch_df) <- c("date","chicken price") # rename variables
ch_df[,1] <- as.Date(as.numeric(unlist(ch_df[,1])), # character to numeric date
origin = as.Date("1899-12-30")) # align to Excel origin point
ch_df[,2] <- round(as.numeric(unlist(ch_df[,2])),3) # character to numeric values
str(ch_df); summary(ch_df) # tibble with date variable and numeric variable
## tibble [507 x 2] (S3: tbl_df/tbl/data.frame)
## $ date : Date[1:507], format: "1980-01-01" "1980-02-01" ...
## $ chicken price: num [1:507] 0.699 0.673 0.655 0.638 0.628 0.64 0.71 0.751 0.791 0.792 ...
## date chicken price
## Min. :1980-01-01 Min. :0.000
## 1st Qu.:1990-07-16 1st Qu.:0.884
## Median :2001-02-01 Median :1.048
## Mean :2001-01-30 Mean :1.092
## 3rd Qu.:2011-08-16 3rd Qu.:1.304
## Max. :2022-03-01 Max. :1.747
which(is.na(ch_df[,2])) # no missing values
## integer(0)
which(ch_df[,2]==0) # missing value was converted to zero value in row 485
## [1] 485
ch_df[481:490,] # view zero value in row 485
## # A tibble: 10 x 2
## date `chicken price`
## <date> <dbl>
## 1 2020-01-01 1.41
## 2 2020-02-01 1.36
## 3 2020-03-01 1.4
## 4 2020-04-01 1.57
## 5 2020-05-01 0
## 6 2020-06-01 1.75
## 7 2020-07-01 1.71
## 8 2020-08-01 1.61
## 9 2020-09-01 1.54
## 10 2020-10-01 1.58
ch_df[485,2] <- NA # convert zero value back to missing (NA) value
ch_df[481:490,] # view missing value (NA) in row 485
## # A tibble: 10 x 2
## date `chicken price`
## <date> <dbl>
## 1 2020-01-01 1.41
## 2 2020-02-01 1.36
## 3 2020-03-01 1.4
## 4 2020-04-01 1.57
## 5 2020-05-01 NA
## 6 2020-06-01 1.75
## 7 2020-07-01 1.71
## 8 2020-08-01 1.61
## 9 2020-09-01 1.54
## 10 2020-10-01 1.58
# impute missing value using linear interpolation
ch_df[485,2] <- na_interpolation(ts(ch_df))[485,2]
ch_df[481:490,] # view imputed value in row 485
## # A tibble: 10 x 2
## date `chicken price`
## <date> <dbl>
## 1 2020-01-01 1.41
## 2 2020-02-01 1.36
## 3 2020-03-01 1.4
## 4 2020-04-01 1.57
## 5 2020-05-01 1.66
## 6 2020-06-01 1.75
## 7 2020-07-01 1.71
## 8 2020-08-01 1.61
## 9 2020-09-01 1.54
## 10 2020-10-01 1.58
## Convert data frome to univariate time series object with defined attributes
ch_ts <- ts(data = ch_df[,2], # series values
start = c(1980,1), # time of first observation: January 1, 1980
end = c(2022,3), # time of last observation: March 1, 2022
frequency = 12) # series frequency
ts_info(ch_ts) # ts object with 1 variable and 507 observations
## The ch_ts series is a ts object with 1 variable and 507 observations
## Frequency: 12
## Start time: 1980 1
## End time: 2022 3
Exploratory data analysis
## Plot time series with "plot" function from R built-in "graphics" package
plot.ts(bfch_ts, # multivariate time series object
plot.type = "multiple",
main = "U.S. Average Price of Beef vs. Chicken",
ylab = c("Beef","Chicken"),
xlab = "Years")

plot.ts(bfch_ts, # multivariate time series object
plot.type = "single",
main = "U.S. Average Price of Beef vs. Chicken",
ylab = c("Beef","Chicken"),
xlab = "Years")

## Plot time series with "ts_plot" function from "TSstudio" package
ts_plot(bfch_ts, # multivariate time series object
type = "multiple",
title = "U.S. Average Price of Beef vs. Chicken",
Ytitle = "Cost per Pound",
Xtitle = "Years",
Xgrid = TRUE,
Ygrid = TRUE)
ts_plot(bfch_ts, # multivariate time series object
type = "single",
title = "U.S. Average Price of Beef vs. Chicken",
Ytitle = "Cost per Pound",
Xtitle = "Years",
Xgrid = TRUE,
Ygrid = TRUE)
## Plot time series with "dygraph" function from "dygraphs" package
dygraph(bfch_ts, # multivariate time series object
main = "U.S. Average Price of Beef vs. Chicken") %>%
dyAxis("y",label = "Cost per Pound") %>%
dySeries("Series 1",axis='y',label = "Ground Beef", color="red") %>%
dySeries("Series 2",axis = 'y',label = "Whole Chicken",color = "orange") %>%
dyLegend()